library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.5.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.5.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.5.2
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(pracma)
## Warning: package 'pracma' was built under R version 3.5.3
library(randtests)
## Warning: package 'randtests' was built under R version 3.5.2
library(vrtest)
## Warning: package 'vrtest' was built under R version 3.5.2
library(ggplot2)
Structure: Three checks and the simulation below them
I decided to simulate a few situations with Amazon stocks. So, I went to https://www.macrotrends.net/stocks/charts/AMZN/amazon/stock-price-history and chose some cases. I attached a very general picture “3 checks on Monte Carlo simulation” for a visualization of a given case. I tried to describe each of them in words, so I hope there won’t be any confusions about the dates, prices and the other stuff.
First two cases weren’t so rough, because they contain a few shifts. The last one is more interesting, so I broke it into several pieces: I tried to see how the simulation changes when we change the train set for an average return for a stock and its percentual volatility. And one more comment: I used several datasets because they contain different periods for Amazon, I should have to merge them, but I was a little bit out of time, and I didn’t know how to do it correctly. So, let’s start.
n - numbers of days that we will simulate. After 180 there was a sort of “unexpected” decrease due to graph Amazon stock prices.
So, I want to check if this simulation can include this little “unexpected” shift.
amzn <- read.csv("data/AMZN_data.csv", header=TRUE)
amzn.1 <- amzn %>% filter(as.Date(amzn$date) < as.Date("2013-11-01"))
amzn.1.plot <- amzn %>% filter(as.Date(amzn$date) < as.Date("2014-08-01"))
last.day.1 <- amzn.1 %>% filter(as.Date(amzn.1$date) == as.Date("2013-10-31"))
last.day.1
## date open high low close volume Name
## 1 2013-10-31 361.73 366 359 364.03 2466937 AMZN
n.1 <- 250 # number of days to a decrease
stock_mu.1 <- mean(exp(diff(log(amzn.1$close))) - 1)
stock_sigma.1 <- sd(amzn.1$close) / mean(amzn.1$close)
stock_price.1 <- last.day.1$close
plot_ly(x = ~amzn.1.plot$date, type="ohlc",
open = ~amzn.1.plot$open, close = ~amzn.1.plot$close,
high = ~amzn.1.plot$high, low = ~amzn.1.plot$low) %>%
layout(title = "Simulation started at 2013-10-31 up to 2014-08-01 (9 months)")
The results for first check are:
10% quantile - 325.8308 5% quantile - 316.2274 1% quantile - 298.1402
Due to the graph, the lowest price in this period was approximately 300. So the worst case scenario was not achieved, but only one percent quantile bounded it
Second case, when I used a Monte Carlo simulation
amzn <- read.csv("data/AMZN_data.csv")
amzn.2 <- amzn %>% filter(as.Date(amzn$date) < as.Date("2015-07-30"))
last.day.2 <- amzn.2 %>% filter(as.Date(amzn.2$date) == as.Date("2015-07-29"))
last.day.2
## date open high low close volume Name
## 1 2015-07-29 530.92 532.9655 525.02 529 3752634 AMZN
amzn.2.plot <- amzn %>% filter(as.Date(amzn$date) < as.Date("2016-03-20"))
plot_ly(x = ~amzn.2.plot$date, type="ohlc",
open = ~amzn.2.plot$open, close = ~amzn.2.plot$close,
high = ~amzn.2.plot$high, low = ~amzn.2.plot$low) %>%
layout(title = " (Simulation started at 2015-07-29 up to 2016-02-25 (5 months)")
n.2 <- 130
stock_mu.2 <- mean(exp(diff(log(amzn.2$close))) - 1)
stock_sigma.2 <- sd(amzn.2$close) / mean(amzn.2$close)
stock_price.2 <- last.day.2$close
The result are: 10% quantile - 426.3757 5% quantile - 401.7373 1% quantile - 356.3271
The actual price was approximately 485. In this case, I think the most magnitude to the correct result gave a big bunch of observations (there was a long positive shift, so volatility expected to be much bigger). It seems to me that it is a bad choice of the previous period because estimated prices are far away from real prices. But nevertheless, the model bounded the price correctly
It was very easy, my results were 205.2773 for 1% and 381.5740 for 5%. But for practical cases is also useless to take such a big period as a train set, because our average expected the return and expected percentual stock volatility would be so to say very widely estimated
amzn.3 <- read.csv("data/AMZN.2010.2018.csv", header=TRUE) # I used this data for a third check 1) that I was explaining
amzn.3.year <- amzn.3 %>% filter(as.Date(amzn.3$Date) > as.Date("2017-09-04"))
amzn.3.halfayear <- amzn.3 %>% filter(as.Date(amzn.3$Date) > as.Date("2018-03-04"))
amzn.3.3month <- amzn.3 %>% filter(as.Date(amzn.3$Date) > as.Date("2017-06-04"))
last.day.3 <- amzn.3 %>% filter(as.Date(amzn.3$Date) == as.Date("2018-09-04"))
n.3 <- 245
stock_mu.3.3month <- mean(exp(diff(log(amzn.3.3month$Close))) - 1) #I was changing an argument due to appropriate dataset.
stock_sigma.3.3month <- sd(amzn.3.3month$Close) / mean(amzn.3.3month$Close) #I was changing an argument due to appropriate dataset.
stock_price.3 <- last.day.3$Close
Comment: there is no plot for the last case because the dataset is bounded by 04-09-2018. So I just get real prices from the link above The biggest fall during this 245 days was 1377.45.
Results from modelling Third check 2) are:
if we train our return and volatility for one year: 10% quantile - 1530.436 5% quantile - 1423.938 1% quantile - 1213.667
We can see, that if we considered only 5% quantile, we would be wrong because the real price was lower by 46,5 points. But 1% quantile did a good job So, it seems maybe one year period is enough.
if we train our return and volatility for a half a year: 10% quantile - 1804.014 5% quantile - 1747.076 1% quantile - 1626.050
We can see, that choosing half a year for training our dataset was a very bad idea because at 17-12-19 we would lose 248.6$ per share. The reason could be that the last half a year to a 2018-09-04 wasn’t so volatile, so average return and percentual volatility became relatively small
I felt that it is a bad idea to continue with three months, but surprisingly for me I get: 10% quantile- 1462.357 5% quantile - 1346.68 1% quantile - 1121.60
It is interesting, but three month period did a better job than the one year model at 5% quantile. But I guess it (3-month period model) has a problem with 1% quantile because it is too far from the real price. Of course, it bounded the real price well, but the distance between them is very large, so if we are doing some optimisation stuff, the three months model can be not optimal, so it is better to use one year model, I guess.
I used some approximation (with the number of days for simulation, for example). The problem is that in real life simulations it is very important to have the right inputs because people can lose real money.
sp500 <- read.csv("data/S&P500.csv", header = TRUE)
sp500.2010 <- sp500 %>% filter(as.Date(sp500$Date) < as.Date("2010-01-01"))
sp500.2008 <- sp500 %>% filter(as.Date(sp500$Date) < as.Date("2008-01-01"))
ggplot(sp500.2010, aes(x=as.Date(sp500.2010$Date), y=sp500.2010$Close)) + geom_line() +
xlab("Date") +
ylab("Close price") + ylim(400, 1600) +
ggtitle("CLose price of S&P500 and simulated loss", subtitle="Even our best guess 1003.404 is bitten by S&P500 stock price
dashed lines indicates the staring/ending (start of 2008 and middle of 2009)
date of simulation") +
geom_hline(yintercept=1003.404, color="red") +
geom_vline(xintercept=as.Date("2008-01-01"), color="blue", linetype="dashed") +
geom_vline(xintercept=as.Date("2009-05-26"), color="blue", linetype="dashed")
last.day.price.2008.01.02<- 1447.160034
n.sp500 <- 450
stock_price.sp500 <- last.day.price.2008.01.02
sp500.mu.until.2008 <- mean(exp(diff(log(sp500.2008$Close))) - 1)
sp500.sigma.until.2008 <- sd(sp500.2008$Close) / mean(sp500.2008$Close)
We received: 1% 5% 10% 1003.404 1106.563 1172.015
It is strange that 2sigma rule did a better job in bounding that Monte Carlo. Maybe, the problem in lack of dates for training a model or I fail to construct a good simulation (I tried to take a much bigger dataset - from 1990 year, but the result was nut much better).
The reason why I did choose to do Monte Carlo simulation is I was thinking that if stock returns follow Random Walk process, I can simulate it and obtain good boundings. But two test for RW returned very small p-value, so I rejected the null hypothesis under 1% confidence level. The point is, maybe, 2sd rule gave a better bounding then my simulation, because of nonrandomness of market price returns - 2008 crisis is not a very random event (as we know from its reasons), that’s why such fall can not be simulated by random process.
Epsilon - random part of a simulation Stock price calculated as a quantile of a normal distribution
stock_return <- function(stock_price, n, stock_mu, stock_sigma){
delta_t <- 1/n # one period
for (i in seq(n)){
epsilon <- runif(n=1, min=0, max=1)
stock_price <- stock_price * (1 + qnorm(epsilon,
stock_mu * delta_t,
stock_sigma* sqrt(delta_t)))
}
return(stock_price)
}
simulations <- 10000
# Monte Carlo simulations
set.seed(100)
stock_prices <- c()
for (i in seq(simulations)){
stock_prices <- c(stock_prices,
stock_return(stock_price=stock_price.sp500,
n=n.sp500,
stock_mu=sp500.mu.until.2008,
stock_sigma=sp500.sigma.until.2008))
}
quantile(stock_prices, c(.01, .05, .1))
## 1% 5% 10%
## 1003.404 1106.563 1172.015